Deployment: Model-agnostic methods

library(randomForest)
library(dplyr)
library(mltools)
library(data.table)
library(pdp)
library(plotly)
horas <- read.csv('hour.csv')
datos <- read.csv('day.csv')
kc_house <- read.csv('kc_house_data.csv')

1.- One dimensional Partial Dependence Plot.

The partial dependence plot shows the marginal effect of a feature on the predicted outcome of a previously fit model.

Apply PDP to the regression example of predicting bike rentals. Fit a random forest approximation for the prediction of bike rentals (cnt). Use the partial dependence plot to visualize the relationships the model learned. Use the slides shown in class as model.

#Siguiendo los pasos de la práctica 3, extraemos lo necesario para el análisis
paso1 <- datos %>% select(workingday,holiday)
datos$season <- as.factor(datos$season)

paso2 <- one_hot(as.data.table(datos$season))
paso2 <- paso2[,-4]
names(paso2) = c('springer','summer','fall')

misty <- datos %>% mutate(misty = case_when(weathersit == 2~ 1,
                                            weathersit != 2 ~ 0))

misty <- misty$misty

rain <- datos %>% mutate(rain = case_when(weathersit == 3| weathersit == 4~ 1,
                                            TRUE ~ 0))

rain <- rain$rain

paso5 <- datos %>% mutate(de_temp = temp*41, de_hum = hum*100, de_windspeed = windspeed*67)
paso5 <- paso5[,c(17,18,19)]

days_since_2011 <- as.numeric(difftime(as.Date(datos$dteday, format = '%Y-%m-%d'),as.Date('2011-01-01',format = '%Y-%m-%d'), units = "days"))

cnt <- datos$cnt

df <- cbind(paso1,paso2, misty,rain,paso5,days_since_2011,cnt)
model_rf <- randomForest(cnt~.,data=df)

pdp_days_since <- pdp::partial(model_rf, pred.var = 'days_since_2011', plot = F)
pdp_temp <- pdp::partial(model_rf, pred.var = 'de_temp', plot = F)
pdp_hum <- pdp::partial(model_rf, pred.var = 'de_hum', plot = F)
pdp_wind <- pdp::partial(model_rf, pred.var = 'de_windspeed', plot = F)
p1 <- ggplot(pdp_days_since, aes(x = days_since_2011, y = yhat)) +
  geom_line() +
  xlab("Days since 2011") + ylab('Partial Dependence')

p2 <- ggplot(pdp_temp, aes(x = de_temp, y = yhat)) +
  geom_line() +
  xlab("Temperature")

p3 <- ggplot(pdp_hum, aes(x = de_hum, y = yhat)) +
  geom_line() +
  xlab("Humidity")

p4 <- ggplot(pdp_wind, aes(x = de_windspeed, y = yhat)) +
  geom_line() +
  xlab("Windspeed")

subplot(p1,p2,p3,p4, shareY = T) %>%  
  layout(annotations = list( 
    list(x = 0.01 , y = 1.07, text = "Days since 2011", showarrow = F, xref='paper', yref='paper'), 
    list(x = 0.3 , y = 1.07, text = "Temperature", showarrow = F, xref='paper', yref='paper'),
    list(x = 0.63 , y = 1.07, text = "Humidity", showarrow = F, xref='paper', yref='paper'),
    list(x = 0.95 , y = 1.07, text = "Windspeed", showarrow = F, xref='paper', yref='paper')))

Analyse the influence of days since 2011, temperature, humidity and wind speed on the predicted bike counts.

A medida que avanza el tiempo, el modelo predice que aumenta el número de bicicletas alquiladas, esto es normal, ya que el servicio se da más a conocer a lo largo del tiempo. For warm but not too hot climates, a large number of rented bikes is predicted. Yet, from temperatures over 27 ºC, the number of rented bikes decrease(too much heat). It appears that cyclists are increasingly inhibited from renting a bike when humidity exceeds 60%. Finally, the windier it gets, the less people like to ride a bike. which is logic. It appears that the model predicts the same from 25 km/h, maybe because there is little training data in that range.

LS0tDQp0aXRsZTogIlBSNSBERVBMT1lNRU5UIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyBEZXBsb3ltZW50OiBNb2RlbC1hZ25vc3RpYyBtZXRob2RzDQoNCmBgYHtyfQ0KbGlicmFyeShyYW5kb21Gb3Jlc3QpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShtbHRvb2xzKQ0KbGlicmFyeShkYXRhLnRhYmxlKQ0KbGlicmFyeShwZHApDQpsaWJyYXJ5KHBsb3RseSkNCmBgYA0KDQpgYGB7cn0NCmhvcmFzIDwtIHJlYWQuY3N2KCdob3VyLmNzdicpDQpkYXRvcyA8LSByZWFkLmNzdignZGF5LmNzdicpDQprY19ob3VzZSA8LSByZWFkLmNzdigna2NfaG91c2VfZGF0YS5jc3YnKQ0KYGBgDQoNCiMjIyAqKjEuLSBPbmUgZGltZW5zaW9uYWwgUGFydGlhbCBEZXBlbmRlbmNlIFBsb3QuKioNCg0KVGhlIHBhcnRpYWwgZGVwZW5kZW5jZSBwbG90IHNob3dzIHRoZSBtYXJnaW5hbCBlZmZlY3Qgb2YgYSBmZWF0dXJlIG9uIHRoZSBwcmVkaWN0ZWQgb3V0Y29tZSBvZiBhIHByZXZpb3VzbHkgZml0IG1vZGVsLg0KDQpBcHBseSBQRFAgdG8gdGhlIHJlZ3Jlc3Npb24gZXhhbXBsZSBvZiBwcmVkaWN0aW5nIGJpa2UgcmVudGFscy4gRml0IGEgcmFuZG9tIGZvcmVzdCBhcHByb3hpbWF0aW9uIGZvciB0aGUgcHJlZGljdGlvbiBvZiBiaWtlIHJlbnRhbHMgKCoqY250KiopLiBVc2UgdGhlIHBhcnRpYWwgZGVwZW5kZW5jZSBwbG90IHRvIHZpc3VhbGl6ZSB0aGUgcmVsYXRpb25zaGlwcyB0aGUgbW9kZWwgbGVhcm5lZC4gVXNlIHRoZSBzbGlkZXMgc2hvd24gaW4gY2xhc3MgYXMgbW9kZWwuDQoNCmBgYHtyfQ0KI1NpZ3VpZW5kbyBsb3MgcGFzb3MgZGUgbGEgcHLDoWN0aWNhIDMsIGV4dHJhZW1vcyBsbyBuZWNlc2FyaW8gcGFyYSBlbCBhbsOhbGlzaXMNCnBhc28xIDwtIGRhdG9zICU+JSBzZWxlY3Qod29ya2luZ2RheSxob2xpZGF5KQ0KZGF0b3Mkc2Vhc29uIDwtIGFzLmZhY3RvcihkYXRvcyRzZWFzb24pDQoNCnBhc28yIDwtIG9uZV9ob3QoYXMuZGF0YS50YWJsZShkYXRvcyRzZWFzb24pKQ0KcGFzbzIgPC0gcGFzbzJbLC00XQ0KbmFtZXMocGFzbzIpID0gYygnc3ByaW5nZXInLCdzdW1tZXInLCdmYWxsJykNCg0KbWlzdHkgPC0gZGF0b3MgJT4lIG11dGF0ZShtaXN0eSA9IGNhc2Vfd2hlbih3ZWF0aGVyc2l0ID09IDJ+IDEsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHdlYXRoZXJzaXQgIT0gMiB+IDApKQ0KDQptaXN0eSA8LSBtaXN0eSRtaXN0eQ0KDQpyYWluIDwtIGRhdG9zICU+JSBtdXRhdGUocmFpbiA9IGNhc2Vfd2hlbih3ZWF0aGVyc2l0ID09IDN8IHdlYXRoZXJzaXQgPT0gNH4gMSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgVFJVRSB+IDApKQ0KDQpyYWluIDwtIHJhaW4kcmFpbg0KDQpwYXNvNSA8LSBkYXRvcyAlPiUgbXV0YXRlKGRlX3RlbXAgPSB0ZW1wKjQxLCBkZV9odW0gPSBodW0qMTAwLCBkZV93aW5kc3BlZWQgPSB3aW5kc3BlZWQqNjcpDQpwYXNvNSA8LSBwYXNvNVssYygxNywxOCwxOSldDQoNCmRheXNfc2luY2VfMjAxMSA8LSBhcy5udW1lcmljKGRpZmZ0aW1lKGFzLkRhdGUoZGF0b3MkZHRlZGF5LCBmb3JtYXQgPSAnJVktJW0tJWQnKSxhcy5EYXRlKCcyMDExLTAxLTAxJyxmb3JtYXQgPSAnJVktJW0tJWQnKSwgdW5pdHMgPSAiZGF5cyIpKQ0KDQpjbnQgPC0gZGF0b3MkY250DQoNCmRmIDwtIGNiaW5kKHBhc28xLHBhc28yLCBtaXN0eSxyYWluLHBhc281LGRheXNfc2luY2VfMjAxMSxjbnQpDQpgYGANCg0KYGBge3J9DQptb2RlbF9yZiA8LSByYW5kb21Gb3Jlc3QoY250fi4sZGF0YT1kZikNCg0KcGRwX2RheXNfc2luY2UgPC0gcGRwOjpwYXJ0aWFsKG1vZGVsX3JmLCBwcmVkLnZhciA9ICdkYXlzX3NpbmNlXzIwMTEnLCBwbG90ID0gRikNCnBkcF90ZW1wIDwtIHBkcDo6cGFydGlhbChtb2RlbF9yZiwgcHJlZC52YXIgPSAnZGVfdGVtcCcsIHBsb3QgPSBGKQ0KcGRwX2h1bSA8LSBwZHA6OnBhcnRpYWwobW9kZWxfcmYsIHByZWQudmFyID0gJ2RlX2h1bScsIHBsb3QgPSBGKQ0KcGRwX3dpbmQgPC0gcGRwOjpwYXJ0aWFsKG1vZGVsX3JmLCBwcmVkLnZhciA9ICdkZV93aW5kc3BlZWQnLCBwbG90ID0gRikNCmBgYA0KDQpgYGB7cn0NCnAxIDwtIGdncGxvdChwZHBfZGF5c19zaW5jZSwgYWVzKHggPSBkYXlzX3NpbmNlXzIwMTEsIHkgPSB5aGF0KSkgKw0KICBnZW9tX2xpbmUoKSArDQogIHhsYWIoIkRheXMgc2luY2UgMjAxMSIpICsgeWxhYignUGFydGlhbCBEZXBlbmRlbmNlJykNCg0KcDIgPC0gZ2dwbG90KHBkcF90ZW1wLCBhZXMoeCA9IGRlX3RlbXAsIHkgPSB5aGF0KSkgKw0KICBnZW9tX2xpbmUoKSArDQogIHhsYWIoIlRlbXBlcmF0dXJlIikNCg0KcDMgPC0gZ2dwbG90KHBkcF9odW0sIGFlcyh4ID0gZGVfaHVtLCB5ID0geWhhdCkpICsNCiAgZ2VvbV9saW5lKCkgKw0KICB4bGFiKCJIdW1pZGl0eSIpDQoNCnA0IDwtIGdncGxvdChwZHBfd2luZCwgYWVzKHggPSBkZV93aW5kc3BlZWQsIHkgPSB5aGF0KSkgKw0KICBnZW9tX2xpbmUoKSArDQogIHhsYWIoIldpbmRzcGVlZCIpDQoNCnN1YnBsb3QocDEscDIscDMscDQsIHNoYXJlWSA9IFQpICU+JSAgDQogIGxheW91dChhbm5vdGF0aW9ucyA9IGxpc3QoIA0KICAgIGxpc3QoeCA9IDAuMDEgLCB5ID0gMS4wNywgdGV4dCA9ICJEYXlzIHNpbmNlIDIwMTEiLCBzaG93YXJyb3cgPSBGLCB4cmVmPSdwYXBlcicsIHlyZWY9J3BhcGVyJyksIA0KICAgIGxpc3QoeCA9IDAuMyAsIHkgPSAxLjA3LCB0ZXh0ID0gIlRlbXBlcmF0dXJlIiwgc2hvd2Fycm93ID0gRiwgeHJlZj0ncGFwZXInLCB5cmVmPSdwYXBlcicpLA0KICAgIGxpc3QoeCA9IDAuNjMgLCB5ID0gMS4wNywgdGV4dCA9ICJIdW1pZGl0eSIsIHNob3dhcnJvdyA9IEYsIHhyZWY9J3BhcGVyJywgeXJlZj0ncGFwZXInKSwNCiAgICBsaXN0KHggPSAwLjk1ICwgeSA9IDEuMDcsIHRleHQgPSAiV2luZHNwZWVkIiwgc2hvd2Fycm93ID0gRiwgeHJlZj0ncGFwZXInLCB5cmVmPSdwYXBlcicpKSkNCmBgYA0KDQpBbmFseXNlIHRoZSBpbmZsdWVuY2Ugb2YgKipkYXlzIHNpbmNlIDIwMTEqKiwgKip0ZW1wZXJhdHVyZSoqLCAqKmh1bWlkaXR5KiogYW5kICoqd2luZCBzcGVlZCoqIG9uIHRoZSBwcmVkaWN0ZWQgYmlrZSBjb3VudHMuDQoNCkEgbWVkaWRhIHF1ZSBhdmFuemEgZWwgdGllbXBvLCBlbCBtb2RlbG8gcHJlZGljZSBxdWUgYXVtZW50YSBlbCBuw7ptZXJvIGRlIGJpY2ljbGV0YXMgYWxxdWlsYWRhcywgZXN0byBlcyBub3JtYWwsIHlhIHF1ZSBlbCBzZXJ2aWNpbyBzZSBkYSBtw6FzIGEgY29ub2NlciBhIGxvIGxhcmdvIGRlbCB0aWVtcG8uIEZvciB3YXJtIGJ1dCBub3QgdG9vIGhvdCBjbGltYXRlcywgYSBsYXJnZSBudW1iZXIgb2YgcmVudGVkIGJpa2VzIGlzIHByZWRpY3RlZC4gWWV0LCBmcm9tIHRlbXBlcmF0dXJlcyBvdmVyIDI3IMK6QywgdGhlIG51bWJlciBvZiByZW50ZWQgYmlrZXMgZGVjcmVhc2UodG9vIG11Y2ggaGVhdCkuIEl0IGFwcGVhcnMgdGhhdCBjeWNsaXN0cyBhcmUgaW5jcmVhc2luZ2x5IGluaGliaXRlZCBmcm9tIHJlbnRpbmcgYSBiaWtlIHdoZW4gaHVtaWRpdHkgZXhjZWVkcyA2MCUuIEZpbmFsbHksIHRoZSB3aW5kaWVyIGl0IGdldHMsIHRoZSBsZXNzIHBlb3BsZSBsaWtlIHRvIHJpZGUgYSBiaWtlLiB3aGljaCBpcyBsb2dpYy4gSXQgYXBwZWFycyB0aGF0IHRoZSBtb2RlbCBwcmVkaWN0cyB0aGUgc2FtZSBmcm9tIDI1IGttL2gsIG1heWJlIGJlY2F1c2UgdGhlcmUgaXMgbGl0dGxlIHRyYWluaW5nIGRhdGEgaW4gdGhhdCByYW5nZS4NCg==